import com.aliyun.odps.udf.UDF;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;

/**
 * @Author: lvfeng
 * @CreateTime: 2019-02-28 20:29
 * @Description: 解析1号库（药监局进口化妆品）html
 */
public class SpliderAnalysis1Goods extends UDF {

    public String evaluate(String key, String targetHtml) throws ParseException {
        String[] keys = new String[]{"title", "state", "title_english", "category", "product_area", "product_company", "product_company_english", "address", "china_company", "china_address", "approval", "approval_date", "validity_date", "content", "remark3", "tech_spec_url", "explainz"};
        if (!StringUtils.isBlank(key) && containsStr(keys, key)) {
            targetHtml = targetHtml.replace("\"", "'").replace("\r\n", "").replace("''", "'");
            Document doc = Jsoup.parse(targetHtml);
            Elements tds = doc.select("td");
            if (null==tds||tds.size()<=0){
                return null;
            }
            //System.out.println(tds.size());
            List<String> list = new ArrayList<>();
            for (int i = 1; i < tds.size(); ++i) {
                if (i == 32) {
                    if (((Element) tds.get(i)).getElementsByTag("a") != null && ((Element) tds.get(i)).getElementsByTag("a").first() != null) {
                        String herf = ((Element) tds.get(i)).getElementsByTag("a").first().attr("href").trim();
                        list.add(herf);
                    }else {
                        list.add(null);
                    }
                    /*String href= ((Element) tds.get(i)).getElementsByTag("a").first().attr("href").trim();
                    list.add(href);*/
                } else if (i % 2 == 0){
                    String element = ((Element) tds.get(i)).text().trim();
                    list.add(element);
                }
            }
            //System.out.println(list);
            if (list.size()<keys.length){
                return null;
            }else {
                HashMap<String, String> map = new HashMap();
                for (int a = 0; a < keys.length; a++) {
                    map.put(keys[a],list.get(a));
                }
                //System.out.println(map);
                return map.get(key);
            }
        } else {
            return null;
        }
    }

    public static boolean containsStr(String[] arr, String targetValue) {
        String[] var5 = arr;
        int var4 = arr.length;

        for (int var3 = 0; var3 < var4; ++var3) {
            String s = var5[var3];
            if (s.equals(targetValue)) {
                return true;
            }
        }

        return false;
    }

  /*  public static void main(String[] args) throws ParseException {
        SpliderAnalysis1Goods demo = new SpliderAnalysis1Goods();
        String html ="<!DOCTYPE html> <html lang=\"zh\"> <head> <meta charset=\"utf-8\"></head> <body> <link href=\"css/index.css\" rel=\"stylesheet\" type=\"text/css\" media=\"all\"> <div class=\"listmain\"> <div> <table width=\"100%\" align=\"center\"> <tbody><tr bgcolor=\"#659ace\"> <td height=\"25\" colspan=\"2\"> <div align=\"center\" class=\"zs2\" style=\"float:left;text-align:center;width:80%;padding-left:40px\">进口化妆品</div> <div style=\"float:right\"><img src=\"images/data_fanhui.gif\" onclick=\"javascript:viewList()\" style=\"cursor:pointer\"></div> </td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">产品名称（中文）</td> <td bgcolor=\"#eaeaea\" width=\"83%\">珂芙缦 修护粉底霜 BO10</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">批件状态</td> <td bgcolor=\"#ffffff\" width=\"83%\">当前批件</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">产品名称（英文）</td> <td bgcolor=\"#eaeaea\" width=\"83%\">COVERMARK ESSENCE FOUNDATION &lt;BO10&gt;</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">产品类别</td> <td bgcolor=\"#ffffff\" width=\"83%\">防晒类</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">生产国（地区）</td> <td bgcolor=\"#eaeaea\" width=\"83%\">日本</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">生产企业（中文）</td> <td bgcolor=\"#ffffff\" width=\"83%\">爱乐星丽制造株式会社</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">生产企业（英文）</td> <td bgcolor=\"#eaeaea\" width=\"83%\">ELSOL PRODUCTS CORPORATION</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">生产企业地址</td> <td bgcolor=\"#ffffff\" width=\"83%\">8 Tanyo Kakegawa Shizuoka， 436-0082</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">在华申报责任单位</td> <td bgcolor=\"#eaeaea\" width=\"83%\">爱乐星丽化妆品制造(上海)有限公司</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">在华责任单位地址</td> <td bgcolor=\"#ffffff\" width=\"83%\">上海市松江区申港路2525号</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">批准文号</td> <td bgcolor=\"#eaeaea\" width=\"83%\">国妆特进字J20140718</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">批准日期</td> <td bgcolor=\"#ffffff\" width=\"83%\">2018-12-20</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">批件有效期</td> <td bgcolor=\"#eaeaea\" width=\"83%\">4</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">备注</td> <td bgcolor=\"#ffffff\" width=\"83%\">1、本产品SPF18，PA++。 2、国家药品监督管理局未组织对本产品所称功效进行审核，本批件不作为对产品所称功效的认可。</td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">产品名称备注</td> <td bgcolor=\"#eaeaea\" width=\"83%\">珂芙缦 修护粉底霜 BO10</td></tr> <tr> <td bgcolor=\"#ffffff\" style=\"text-align:right\" width=\"17%\" nowrap=\"true\">产品技术要求</td> <td bgcolor=\"#ffffff\" width=\"83%\"><a target=\"_blank\" href=\"http://114.118.97.190/sfda/ShowJSYQAction.do?PID=a37554c59be3ee71b69b76b43f1fb592\">查看详细内容</a></td></tr> <tr> <td></td> <td></td></tr> <tr> <td bgcolor=\"#eaeaea\" style=\"text-align:right\">注</td> <td bgcolor=\"#eaeaea\"><span style=\"FONT-SIZE: 14px; COLOR: #000066\">1.批件状态说明：“当前”表示此产品的批件为最新的有效批件，“历史”表示此产品的曾用批件，“过期”表示此产品的批件已经过期，“注销”表示此产品的批件已被注销。此产品批件信息不作为执法依据。</span></td></tr> </tbody></table> <table> <tbody><tr height=\"35\"> <td onclick=\"javascript:viewList();\" style=\"cursor:pointer\"><img src=\"images/search_back.gif\"></td></tr> </tbody></table> <a href=\"/stream_4f7ec2a26362a/admin/\" style=\"display:none\">admin</a><a href=\"/stream_4f7ec2a26362a/wp-admin/\" style=\"display:none\">wp-admin</a><a href=\"/stream_4f7ec2a26362a/backend/\" style=\"display:none\">backend</a> </div></div><a target=\"_blank\" href=/file/1/composition/207958_a37554c59be3ee71b69b76b43f1fb592.html>查看详细内容</a> </body> </html>";
        html = html.replace("\"", "'").replace("\r\n", "").replace("''", "'");
        System.out.println(demo.evaluate("approval_date", html));
    }*/
}
